#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import numpy as np
import datetime as d
'''
用户在当前课程上两个操作间的时间距离的平均值
'''

EMPTY_ACTION = 0



def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
log, output = args['log'], args['output']



time_format = "%Y-%m-%dT%H:%M:%S"

user_action = {}

line_no = 0
reader = csv.reader(open(log))
for enrollment_id,username,course_id,time,source,event,_object in reader:
    line_no += 1 
    if line_no == 1: continue
    _time = t.strptime(time, time_format)

    date = d.datetime(_time.tm_year, _time.tm_mon, _time.tm_mday, _time.tm_hour, _time.tm_min, _time.tm_sec)
    key = enrollment_id #"%s-%s" % (username, course_id)
    if key not in user_action:
        user_action[key] = {
            'overall': []
            }
    rcd = user_action[key]
    if event not in rcd:
        rcd[event] = []
    rcd[event].append(date)
    rcd['overall'].append(date)

# get feas
writer = csv.writer(open(output, 'w'))

header = ["enrollment_id", ]

for action in "mean min max std".split():
    for field in ["overall", 'problem', 'video', 'access', 'wiki', 'discussion', 'nagivate', 'page_close']:
        header.append(
            "event_distance.%s.%s" % (field, action))


def statis(rcd):
    '''
    get mean min max
    '''
    res = []
    for statis, method in [
        ("mean", np.mean),
        ("min", np.min),
        ("max", np.max),
        ("std", np.std),]:

        for action in ["overall", 'problem', 'video', 'access', 'wiki', 'discussion', 'nagivate', 'page_close']:
            date_list = []
            s = EMPTY_ACTION
            # get distance
            if action in rcd and len(rcd[action]) > 1:
                date_list = sorted(rcd[action])
                dists = []
                for i in range(len(date_list) - 1):
                    n_hours = (date_list[i+1] - date_list[i]).total_seconds() / 3600
                    dists.append(n_hours)
                s = method(dists)

            res.append(s)
    return res



writer.writerow(header)
for key, rcd in user_action.items(): 
    res = statis(rcd)
    assert(len(res) == 32)

    writer.writerow([key] + res)


